{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import torch\n",
    "from torch.utils.data import DataLoader\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "\n",
    "import sys\n",
    "\n",
    "sys.path.insert(0, \"lib/\")\n",
    "from data.coco_dataset import CocoDataset\n",
    "from utils.preprocess_sample import preprocess_sample\n",
    "from utils.collate_custom import collate_custom\n",
    "from utils.utils import to_cuda_variable\n",
    "import utils.result_utils as result_utils\n",
    "from utils.json_dataset_evaluator import evaluate_boxes, evaluate_masks\n",
    "from model.detector import detector\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Parameters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Pretrained model\n",
    "# https://s3-us-west-2.amazonaws.com/detectron/35859007/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_2x.yaml.01_49_07.By8nQcCH/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\n",
    "arch = 'resnet50'\n",
    "pretrained_model_file = 'files/trained_models/mask/e2e_mask_rcnn_R-50-FPN_2x.pth'\n",
    "\n",
    "# COCO minival2014 dataset path\n",
    "coco_ann_file = 'datasets/data/coco/annotations/instances_minival2014.json'\n",
    "img_dir = 'datasets/data/coco/val2014'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Create dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "loading annotations into memory...\n",
      "Done (t=1.12s)\n",
      "creating index...\n",
      "index created!\n"
     ]
    }
   ],
   "source": [
    "dataset = CocoDataset(ann_file=coco_ann_file, img_dir=img_dir,\n",
    "                      sample_transform=preprocess_sample(target_sizes=[800]))\n",
    "dataloader = DataLoader(dataset, batch_size=1,  # only batch_size=1 is supported by now\n",
    "                        shuffle=False, num_workers=0, collate_fn=collate_custom)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Create detector model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "loading pretrained weights\n"
     ]
    }
   ],
   "source": [
    "model = detector(arch=arch,\n",
    "                 use_rpn_head=True,\n",
    "                 use_mask_head=True)\n",
    "state_dict = torch.load(f=pretrained_model_file)\n",
    "model.load_state_dict(state_dict)\n",
    "model = model.eval().cuda()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Evaluate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Create data structure to store results\n",
    "all_boxes, all_segms, all_keyps = result_utils.empty_results(dataset.num_classes, len(dataset)) \n",
    "# (only all_boxes will be used for fast RCNN)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1/5000\n",
      "101/5000\n",
      "201/5000\n",
      "301/5000\n",
      "401/5000\n",
      "501/5000\n",
      "601/5000\n",
      "701/5000\n",
      "801/5000\n",
      "901/5000\n",
      "1001/5000\n",
      "1101/5000\n",
      "1201/5000\n",
      "1301/5000\n",
      "1401/5000\n",
      "1501/5000\n",
      "1601/5000\n",
      "1701/5000\n",
      "1801/5000\n",
      "1901/5000\n",
      "2001/5000\n",
      "2101/5000\n",
      "2201/5000\n",
      "2301/5000\n",
      "2401/5000\n",
      "2501/5000\n",
      "2601/5000\n",
      "2701/5000\n",
      "2801/5000\n",
      "2901/5000\n",
      "3001/5000\n",
      "3101/5000\n",
      "3201/5000\n",
      "3301/5000\n",
      "3401/5000\n",
      "3501/5000\n",
      "3601/5000\n",
      "3701/5000\n",
      "3801/5000\n",
      "3901/5000\n",
      "4001/5000\n",
      "4101/5000\n",
      "4201/5000\n",
      "4301/5000\n",
      "4401/5000\n",
      "4501/5000\n",
      "4601/5000\n",
      "4701/5000\n",
      "4801/5000\n",
      "4901/5000\n",
      "Done!\n"
     ]
    }
   ],
   "source": [
    "# Compute detections for whole dataset\n",
    "from tqdm import tqdm\n",
    "for batch in tqdm(dataloader):\n",
    "    batch = to_cuda_variable(batch)\n",
    "    # forward pass\n",
    "    with torch.no_grad():\n",
    "        class_scores, bbox_deltas, rois, img_features = model(batch['image'],\n",
    "                                                              scaling_factor=batch['scaling_factors'])\n",
    "        class_scores = torch.nn.functional.softmax(class_scores, dim=1)\n",
    "        # postprocess output:\n",
    "    # - convert coordinates back to original image size, \n",
    "    # - treshold proposals based on score,\n",
    "    # - do NMS.\n",
    "    scores_final, boxes_final, boxes_per_class = result_utils.postprocess_output(rois,\n",
    "                                                                                 batch['scaling_factors'],\n",
    "                                                                                 batch['original_im_size'],\n",
    "                                                                                 class_scores,\n",
    "                                                                                 bbox_deltas)\n",
    "    if len(boxes_final) == 0:\n",
    "        continue\n",
    "\n",
    "    # compute masks\n",
    "    boxes_final_th = torch.cuda.FloatTensor(boxes_final) * batch['scaling_factors']\n",
    "    masks = model.mask_head(img_features, boxes_final_th)\n",
    "    # postprocess mask output:\n",
    "    h_orig = int(batch['original_im_size'].squeeze()[0].data.cpu().numpy().item())\n",
    "    w_orig = int(batch['original_im_size'].squeeze()[1].data.cpu().numpy().item())\n",
    "    cls_segms = result_utils.segm_results(boxes_per_class, masks.cpu().data.numpy(), boxes_final, h_orig, w_orig)\n",
    "\n",
    "    # store results\n",
    "    result_utils.extend_results(i, all_boxes, boxes_per_class)\n",
    "    result_utils.extend_results(i, all_segms, cls_segms)\n",
    "\n",
    "print('Done!')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# # Save detection and segmentation results\n",
    "np.save('files/results/all_boxes_segms_mask.npy',{'all_boxes': all_boxes, 'all_segms': all_segms})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading and preparing results...\n",
      "DONE (t=1.53s)\n",
      "creating index...\n",
      "index created!\n",
      "Running per image evaluation...\n",
      "Evaluate annotation type *bbox*\n",
      "DONE (t=39.89s).\n",
      "Accumulating evaluation results...\n",
      "DONE (t=5.97s).\n",
      " Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.378\n",
      " Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.581\n",
      " Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.410\n",
      " Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.207\n",
      " Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.419\n",
      " Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.521\n",
      " Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.317\n",
      " Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.485\n",
      " Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.503\n",
      " Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.302\n",
      " Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.551\n",
      " Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.672\n"
     ]
    }
   ],
   "source": [
    "# Compute evaluation metrics\n",
    "coco_box_eval = evaluate_boxes(json_dataset=dataset.coco, \n",
    "                           all_boxes=all_boxes, \n",
    "                           output_dir='files/results/',\n",
    "                           use_salt=False, cleanup=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading and preparing results...\n",
      "DONE (t=4.94s)\n",
      "creating index...\n",
      "index created!\n",
      "Running per image evaluation...\n",
      "Evaluate annotation type *segm*\n",
      "DONE (t=44.14s).\n",
      "Accumulating evaluation results...\n",
      "DONE (t=6.07s).\n",
      " Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.328\n",
      " Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.545\n",
      " Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.345\n",
      " Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.133\n",
      " Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.360\n",
      " Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.511\n",
      " Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.284\n",
      " Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.427\n",
      " Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.442\n",
      " Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.235\n",
      " Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.489\n",
      " Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.628\n"
     ]
    }
   ],
   "source": [
    "coco_segm_eval = evaluate_masks(json_dataset=dataset.coco,\n",
    "                                all_boxes=all_boxes,\n",
    "                                all_segms=all_segms,\n",
    "                                output_dir='files/results/',\n",
    "                                use_salt=False, cleanup=False)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python (detectorch0.3)",
   "language": "python",
   "name": "detectorch03"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
